This data set was generated by Reddit user yuxbni76 https://www.reddit.com/user/yuxbni76 The data was scraped from ESPN.com and covers the 2002 to 2019 seasons up to week 6. Three games were missing in the data set (kind of weird they are all near holidays).
So I found that data on https://www.pro-football-reference.com/ and added it into the data set before importing.
nfl_stats<- read.csv("nfl_dataset_2002-2019week6.csv")
summary(nfl_stats)
## date away home first_downs_away
## Length:4631 Length:4631 Length:4631 Min. : 3.00
## Class :character Class :character Class :character 1st Qu.:15.00
## Mode :character Mode :character Mode :character Median :19.00
## Mean :18.78
## 3rd Qu.:22.00
## Max. :37.00
## first_downs_home third_downs_away third_downs_home fourth_downs_away
## Min. : 3.00 Length:4631 Length:4631 Length:4631
## 1st Qu.:16.00 Class :character Class :character Class :character
## Median :20.00 Mode :character Mode :character Mode :character
## Mean :19.78
## 3rd Qu.:23.00
## Max. :40.00
## fourth_downs_home passing_yards_away passing_yards_home rushing_yards_away
## Length:4631 Min. : -7.0 Min. : 6.0 Min. :-18.0
## Class :character 1st Qu.:164.0 1st Qu.:172.0 1st Qu.: 73.0
## Mode :character Median :217.0 Median :221.0 Median :103.0
## Mean :219.9 Mean :226.6 Mean :109.7
## 3rd Qu.:273.0 3rd Qu.:276.0 3rd Qu.:139.0
## Max. :516.0 Max. :522.0 Max. :351.0
## rushing_yards_home total_yards_away total_yards_home comp_att_away
## Min. : -3.0 Min. : 26.0 Min. : 77.0 Length:4631
## 1st Qu.: 81.0 1st Qu.:270.0 1st Qu.:286.0 Class :character
## Median :112.0 Median :329.0 Median :343.0 Mode :character
## Mean :117.8 Mean :329.6 Mean :344.4
## 3rd Qu.:148.0 3rd Qu.:389.0 3rd Qu.:400.0
## Max. :378.0 Max. :643.0 Max. :653.0
## comp_att_home sacks_away sacks_home rushing_attempts_away
## Length:4631 Length:4631 Length:4631 Min. : 6.00
## Class :character Class :character Class :character 1st Qu.:21.00
## Mode :character Mode :character Mode :character Median :26.00
## Mean :26.59
## 3rd Qu.:32.00
## Max. :57.00
## rushing_attempts_home fumbles_away fumbles_home int_away
## Min. : 6.00 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:22.00 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :28.00 Median :0.0000 Median :0.000 Median :1.0000
## Mean :27.83 Mean :0.6597 Mean :0.653 Mean :0.9836
## 3rd Qu.:33.00 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:2.0000
## Max. :60.00 Max. :5.0000 Max. :4.000 Max. :6.0000
## int_home turnovers_away turnovers_home penalties_away
## Min. :0.000 Min. :0.000 Min. :0.000 Length:4631
## 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:1.000 Class :character
## Median :1.000 Median :1.000 Median :1.000 Mode :character
## Mean :0.916 Mean :1.643 Mean :1.569
## 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :6.000 Max. :8.000 Max. :7.000
## penalties_home redzone_away redzone_home drives_away
## Length:4631 Length:4631 Length:4631 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.:11.00
## Mode :character Mode :character Mode :character Median :12.00
## Mean :12.48
## 3rd Qu.:14.00
## Max. :26.00
## drives_home def_st_td_away def_st_td_home possession_away
## Min. : 0.0 Min. :0.0000 Min. :0.0000 Length:4631
## 1st Qu.:11.0 1st Qu.:0.0000 1st Qu.:0.0000 Class :character
## Median :12.0 Median :0.0000 Median :0.0000 Mode :character
## Mean :12.4 Mean :0.3468 Mean :0.3701
## 3rd Qu.:14.0 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :25.0 Max. :6.0000 Max. :6.0000
## possession_home score_away score_home
## Length:4631 Min. : 0.00 Min. : 0.0
## Class :character 1st Qu.:14.00 1st Qu.:16.0
## Mode :character Median :20.00 Median :23.0
## Mean :20.82 Mean :23.3
## 3rd Qu.:27.00 3rd Qu.:30.0
## Max. :59.00 Max. :62.0
# Convert columns that make sense to factor
## Home and away team names
nfl_stats$home <- as.factor(nfl_stats$home)
nfl_stats$away <- as.factor(nfl_stats$away)
Team colors were extracted from https://teamcolorcodes.com, I took the first primary color for each team and created a list that will be for later use. For the Browns and Titans I took the secondary color as it seemed more appropriate.
Team_colors <- c("49ers"="#AA0000",
"Bears"="#0B162A",
"Bengals"="#FB4F14",
"Bills"="#00338D",
"Broncos"="#FB4F14",
"Browns"="#FF3C00",
"Buccaneers"="#D50A0A",
"Cardinals"="#97233F",
"Chargers"="#0080C6",
"Chiefs"="#E31837",
"Colts"="#002C5F",
"Cowboys"="#041E42",
"Dolphins"="#008E97",
"Eagles"="#004C54",
"Falcons"="#A71930",
"Giants"="#0B2265",
"Jaguars"="#006778",
"Jets"="#125740",
"Lions"="#0076B6",
"Packers"="#203731",
"Panthers"="#0085CA",
"Patriots"="#002244",
"Raiders"="#000000",
"Rams"="#003594",
"Ravens"="#241773",
"Redskins"="#773141",
"Saints"="#D3BC8D",
"Seahawks"="#002244",
"Steelers"="#FFB612",
"Texans"="#03202F",
"Titans"="#4B92DB",
"Vikings"="#4F2683")
A <- ggplot(nfl_stats, aes(x = home)) + geom_bar(aes(fill = home)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle("Home Games") + ylab("# Games") + scale_fill_manual(values = Team_colors) +
theme(legend.position = "none")
B <- ggplot(nfl_stats, aes(x = away)) + geom_bar(aes(fill = away)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle("Away Games") + ylab("# Games") + scale_fill_manual(values = Team_colors) +
theme(legend.position = "none")
ggarrange(A,B)
# wins by team
Teams_32.list <- levels(nfl_stats$home)
numb_wins_list <- c()
for (i in Teams_32.list){
x <- dim(nfl_stats[nfl_stats$home == i ,])
y <- dim(nfl_stats[nfl_stats$home == i & nfl_stats$score_away < nfl_stats$score_home,] )
z<- c(x[1], y[1])
numb_wins_list[[i]] <- z
}
Home_wins_n_losses <- as.data.frame(do.call(rbind, numb_wins_list))
colnames(Home_wins_n_losses) <- c("Games", "Wins")
Home_wins_n_losses$Losses <- Home_wins_n_losses$Games - Home_wins_n_losses$Wins
Home_wins_n_losses$Teams <- rownames(Home_wins_n_losses)
long <- reshape2::melt(Home_wins_n_losses[,-1], id='Teams')
ggplot(long, aes(x=Teams, y=value, fill=variable)) + geom_bar(stat = "identity")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
nfl_stats$game_number <- seq(1,length(nfl_stats$date), 1)
# All teams
X <- ggplot(nfl_stats, aes(x=game_number, y=passing_yards_home, color = home, label=score_home, label2=score_away)) + geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors) +
annotate("rect", fill = "black", alpha= 0.25, xmin=1, xmax=267, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=267, xmax=534, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "black", alpha= 0.25, xmin=534, xmax=801, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=801, xmax=1068, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "black", alpha= 0.25, xmin=1068, xmax=1335, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=1335, xmax=1602, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "black", alpha= 0.25, xmin=1602, xmax=1869, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=1869, xmax=2136, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "black", alpha= 0.25, xmin=2136, xmax=2403, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=2403, xmax=2670, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "black", alpha= 0.25, xmin=2670, xmax=2937, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=2937, xmax=3204, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "black", alpha= 0.25, xmin=3204, xmax=3471, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=3471, xmax=3738, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "black", alpha= 0.25, xmin=3738, xmax=4005, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=4005, xmax=4272, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "black", alpha= 0.25, xmin=4272, xmax=4539, ymin=-Inf, ymax=Inf) +
annotate("rect", fill = "white", alpha= 0.0, xmin=4539, xmax=4631, ymin=-Inf, ymax=Inf) +
annotate("text", x = 134, y = 310, label = "2002", vjust = -0.5) +
annotate("text", x = 4405, y = 310, label = "2018", vjust = -0.5)
X
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggsave("Passing_yards_home2002-2018.png", X, width = 6,height = 6)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
P <- ggplot(nfl_stats, aes(x=game_number, y=passing_yards_home, color = home, label=date, label2=score_home, label3=score_away)) + geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors) +
geom_point()
Q <- ggplot(nfl_stats, aes(x=game_number, y=passing_yards_away, color = away, label=date, label2=score_home, label3=score_away)) + geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors) +
geom_point()
ggplotly(P, tooltip = c('passing_yards_home', 'home', 'date','score_home','score_away'))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplotly(Q, tooltip = c('passing_yards_away', 'away', 'date','score_home','score_away'))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#```{r} #AFC WEST #ggplot(nfl_stats[nfl_stats$home == c(‘Chiefs’, ‘Raiders’, ‘Chargers’, ‘Broncos’),], aes(x=game_number, y=passing_yards_home, color = home)) + #geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors)
#ggplot(nfl_stats[nfl_stats$home == c(‘Chiefs’, ‘Raiders’, ‘Chargers’, ‘Broncos’),], aes(x=game_number, y=rushing_yards_home, color = home)) + #geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors) #```